# Title: Measuring Interethnic Marriage in Africa
# Author: Branden Bohrnsen, bohrnsen@umich.edu
# Date: 1/30/2026
# Description: Generate figures and tables for interethnic marriage analysis.

library(tidyverse)
library(knitr)
library(ggpubr)
library(here)

basic_summary <- read_csv(here("out", "final", "basic_summary.csv"))
basic_by_decade <- read_csv(here("out", "final", "basic_by_decade.csv"))
basic_by_geography <- read_csv(here("out", "final", "basic_by_geography.csv"))
basic_by_decade_urban_rural <- read_csv(here("out", "final", "basic_by_decade_urban_rural.csv"))

elf_overall <- read_csv(here("out", "final", "elf_overall.csv"))
elf_by_decade <- read_csv(here("out", "final", "elf_by_decade.csv"))
elf_province <- read_csv(here("out", "final", "elf_province.csv"))
elf_urban_rural <- read_csv(here("out", "final", "elf_urban_rural.csv"))
elf_urban_rural_by_decade <- read_csv(here("out", "final", "elf_urban_rural_by_decade.csv"))

marriages_overall <- read_csv(here("out", "final", "marriages_overall.csv"))
marriages_by_decade <- read_csv(here("out", "final", "marriages_by_decade.csv"))
marriages_province <- read_csv(here("out", "final", "marriages_province.csv"))
marriages_urban_rural <- read_csv(here("out", "final", "marriages_urban_rural.csv"))
marriages_urban_rural_by_decade <- read_csv(here("out", "final", "marriages_urban_rural_by_decade.csv"))

kable(elf_overall %>% select(-contains("_se")), digits = 3)
kable(elf_urban_rural %>% select(-contains("_se")), digits = 3)
kable(basic_summary %>% select(-contains("_se")), digits = 3)
kable(basic_by_geography %>% select(-contains("_se")), digits = 3)

kable(marriages_by_decade %>% select(-contains("_se")), digits = 3)
kable(marriages_overall %>% select(-contains("_se")), digits = 3)
kable(marriages_province %>% select(-contains("_se")), digits = 3)
kable(marriages_urban_rural %>% select(-contains("_se")), digits = 3)

marriages_urban_rural_by_decade <- marriages_urban_rural_by_decade %>%
  mutate(Urban = if_else(Urban == TRUE, "Urban", "Rural"))

elf_urban_rural_by_decade <- elf_urban_rural_by_decade %>%
  mutate(Urban = if_else(Urban == TRUE, "Urban", "Rural"))

results_by_geography_and_decade <- marriages_urban_rural_by_decade %>%
  left_join(y = elf_urban_rural_by_decade, join_by(Urban, DecadeMarried, Group_Type), suffix = c("_Eq2", "_Eq1"))

fig1 <- ggplot(results_by_geography_and_decade %>% filter(Group_Type == "Ethnic Block")) +
    geom_ribbon(aes(x = DecadeMarried,
                    ymin = avg_observed_rate_national_Eq2 - 1.96 * avg_observed_rate_national_se_Eq2,
                    ymax = avg_observed_rate_national_Eq2 + 1.96 * avg_observed_rate_national_se_Eq2, group = 1),
                fill = "gray", alpha = 1) +
  geom_ribbon(aes(x = DecadeMarried,
                    ymin = avg_expectation_national_Eq2 - 1.96 * avg_expectation_national_se_Eq2,
                    ymax = avg_expectation_national_Eq2 + 1.96 * avg_expectation_national_se_Eq2, group = 1),
                fill = "gray", alpha = 1) +
  geom_ribbon(aes(x = DecadeMarried,
                    ymin = avg_expectation_national_Eq2 - 1.96 * avg_expectation_national_se_Eq2,
                    ymax = avg_expectation_national_Eq2 + 1.96 * avg_expectation_national_se_Eq2, group = 1),
                fill = "gray", alpha = 1) +
    geom_line(aes(x = DecadeMarried, y = avg_observed_rate_national_Eq2, linetype = "Observed", group = 1)) +
  geom_line(aes(x = DecadeMarried, y = avg_expectation_national_Eq2, linetype = "Expected", group = 1)) +
    scale_linetype_manual(values = c("Observed" = "dotdash", "Expected" = "dashed")) +
    labs(
         title = "Changes in Observed and Expected Intermarriage Rates, by Urban/Rural Location (Ethnic Block)",
         x = "Decade Married",
         y = "Rate",
         linetype = "Measure") +
    theme_pubclean() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.spacing = unit(2, "lines")) +
  facet_wrap(~ Urban)
ggsave(here("figures", "Figure_1.png"), fig1, width = 8, height = 5, dpi = 300)

fig2 <- ggplot(results_by_geography_and_decade %>% filter(Group_Type == "Ethnic Group")) +
    geom_ribbon(aes(x = DecadeMarried,
                    ymin = avg_observed_rate_national_Eq2 - 1.96 * avg_observed_rate_national_se_Eq2,
                    ymax = avg_observed_rate_national_Eq2 + 1.96 * avg_observed_rate_national_se_Eq2, group = 1),
                fill = "gray", alpha = 1) +
  geom_ribbon(aes(x = DecadeMarried,
                    ymin = avg_expectation_national_Eq2 - 1.96 * avg_expectation_national_se_Eq2,
                    ymax = avg_expectation_national_Eq2 + 1.96 * avg_expectation_national_se_Eq2, group = 1),
                fill = "gray", alpha = 1) +
    geom_line(aes(x = DecadeMarried, y = avg_observed_rate_national_Eq2, linetype = "Observed", group = 1)) +
  geom_line(aes(x = DecadeMarried, y = avg_expectation_national_Eq2, linetype = "Expected", group = 1)) +
    scale_linetype_manual(values = c("Observed" = "dotdash", "Expected" = "dashed")) +
    labs(
         title = "Changes in Observed and Expected Intermarriage Rates, by Urban/Rural Location (Ethnic Group)",
         x = "Decade Married",
         y = "Rate",
         linetype = "Measure") +
    theme_pubclean() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.spacing = unit(2, "lines")) +
  facet_wrap(~ Urban)
ggsave(here("figures", "Figure_A3.png"), fig2, width = 8, height = 5, dpi = 300)

fig3 <- ggplot(results_by_geography_and_decade %>% filter(Group_Type == "Ethnic Block")) +
    geom_ribbon(aes(x = DecadeMarried,
                    ymin = I_group_share_national - 1.96 * I_group_share_national_se,
                    ymax = I_group_share_national + 1.96 * I_group_share_national_se, group = 1),
                fill = "gray", alpha = 1) +
  geom_ribbon(aes(x = DecadeMarried,
                    ymin = I_community_diversity_national - 1.96 * I_community_diversity_national_se,
                    ymax = I_community_diversity_national + 1.96 * I_community_diversity_national_se, group = 1),
                fill = "gray", alpha = 1) +
    geom_line(aes(x = DecadeMarried, y = I_group_share_national, linetype = "Group-Community", group = 1)) +
  geom_line(aes(x = DecadeMarried, y = I_community_diversity_national, linetype = "Community", group = 1)) +
    scale_linetype_manual(values = c("Group-Community" = "solid", "Community" = "dashed")) +
    labs(
         title = "Changes in Exposure-Adjusted Intermarriage Rates, by Urban/Rural Location (Ethnic Block)",
         x = "Decade Married",
         y = "Adjusted Intermarriage Rate",
         linetype = "Measure") +
    theme_pubclean() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.spacing = unit(2, "lines")) +
  facet_wrap(~ Urban)
ggsave(here("figures", "Figure_2.png"), fig3, width = 8, height = 5, dpi = 300)

fig4 <- ggplot(results_by_geography_and_decade %>% filter(Group_Type == "Ethnic Group")) +
    geom_ribbon(aes(x = DecadeMarried,
                    ymin = I_group_share_national - 1.96 * I_group_share_national_se,
                    ymax = I_group_share_national + 1.96 * I_group_share_national_se, group = 1),
                fill = "gray", alpha = 1) +
  geom_ribbon(aes(x = DecadeMarried,
                    ymin = I_community_diversity_national - 1.96 * I_community_diversity_national_se,
                    ymax = I_community_diversity_national + 1.96 * I_community_diversity_national_se, group = 1),
                fill = "gray", alpha = 1) +
    geom_line(aes(x = DecadeMarried, y = I_group_share_national, linetype = "Group-Community", group = 1)) +
  geom_line(aes(x = DecadeMarried, y = I_community_diversity_national, linetype = "Community", group = 1)) +
    scale_linetype_manual(values = c("Group-Community" = "solid", "Community" = "dashed")) +
    labs(
         title = "Changes in Exposure-Adjusted Intermarriage Rates, by Urban/Rural Location (Ethnic Group)",
         x = "Decade Married",
         y = "Adjusted Intermarriage Rate",
         linetype = "Measure") +
    theme_pubclean() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1), panel.spacing = unit(2, "lines")) +
  facet_wrap(~ Urban)
ggsave(here("figures", "Figure_A4.png"), fig4, width = 8, height = 5, dpi = 300)

time_invariant_results <- bind_rows(
  marriages_overall %>% select(Adjusted = I_group_share_national, Observed = avg_observed_rate_national, Exposure = avg_expectation_national, Group_Type) %>% mutate(method = "Group-Constituency"),
  elf_overall %>% select(Adjusted = I_community_diversity_national, Observed = avg_observed_rate_national, Exposure = avg_expectation_national, Group_Type) %>% mutate(method = "Constituency"),
  basic_summary %>% select(Adjusted = Basic_Adjusted, Observed = Basic_Observed, Exposure = Basic_ELF, Group_Type) %>% mutate(method = "National")
) %>%
reshape2::melt(id = c("Group_Type", "method"))

time_invariant_results

fig5 <- time_invariant_results %>%
  filter(Group_Type == "Ethnic Block") %>%
  mutate(
    method = factor(method, levels = c("National", "Constituency", "Group-Constituency")),
    variable = factor(variable, levels = c("Observed", "Exposure", "Adjusted"))
  ) %>%
  ggplot() +
  geom_bar(aes(x = variable, y = value, fill = variable), stat = "identity") +
  scale_fill_grey(start = 0.3, end = 0.7) +
  theme_pubclean() +
  rotate_x_text(45) +
  theme(legend.position = "none") +
  facet_wrap(~ method) +
  labs(title = "Results by method of calculating marriage market", y = "Value", x = "Outcome")
ggsave(here("figures", "Figure_A5.png"), fig5, width = 8, height = 5, dpi = 300)

table3_decade <- marriages_by_decade %>%
  select(DecadeMarried, Group_Type, Adjusted = I_group_share_national, Observed = avg_observed_rate_national, `N (couples)` = total_n_marriages) %>%
  mutate(Category = "Decade") %>%
  rename(Label = DecadeMarried)

table3_urban <- marriages_urban_rural %>%
  mutate(Label = if_else(Urban == TRUE, "Urban", "Rural")) %>%
  select(Label, Group_Type, Adjusted = I_group_share_national, Observed = avg_observed_rate_national, `N (couples)` = total_n_marriages) %>%
  mutate(Category = "Urban/Rural")

table3_province <- marriages_province %>%
  select(Label = Province, Group_Type, Adjusted = I_group_share_national, Observed = avg_observed_rate_national, `N (couples)` = total_n_marriages) %>%
  mutate(Category = "Province")

table3_overall <- marriages_overall %>%
  select(Group_Type, Adjusted = I_group_share_national, Observed = avg_observed_rate_national, `N (couples)` = total_n_marriages) %>%
  mutate(Category = "Overall", Label = "Countrywide")

table3_combined <- bind_rows(table3_overall, table3_decade, table3_urban, table3_province) %>%
  select(Category, Label, Group_Type, Observed, Adjusted, `N (couples)`) %>%
  arrange(Category, Group_Type, Label)

write_csv(table3_combined, here("out", "final", "Table_3_Intermarriage_Rates_in_Zambia.csv"))
write_csv(table3_combined, here("figures", "Table_3_Intermarriage_Rates_in_Zambia.csv"))
cat("\nSaved Table 3 to out/final/Table_3_Intermarriage_Rates_in_Zambia.csv\n")
